/*------------------------------------------------------------------------------

Project: Wouter Schakel and Armen Hakhverdian (2018). Ideological Congruence
	and Socio-Economic Inequality. European Political Science Review, 10(3):
	441-465. doi: 10.1017/S1755773918000036

Data: Dutch Parliamentary Election Studies of 2006 and 1971-2006, downloaded from
	https://easy.dans.knaw.nl/ui/datasets/id/easy-dataset:45027 (2006)
	https://easy.dans.knaw.nl/ui/datasets/id/easy-dataset:34143 (1971-2006)

	Dutch Parliament Studies of 2006, 2001 and 1990, obtained from Rudy Andeweg
	(not publicly available!)
	
Description: This do-file contains the commands to reproduce the analyses. It
	is divided into these parts:
	
	1. Data preparation
	2. Calculating congruence
	3. Bootstrap
	4. Control variables
	
------------------------------------------------------------------------------*/

global data	"YOUR PATH HERE"

/*------------------------------------------------------------------------------
 1. Data preparation
------------------------------------------------------------------------------*/

/* The data preparation involves constructing a dataset that combines questions
on issue attitudes from the Dutch Parliamentary Election Study and Dutch Parlia-
ment Studies. The resulting dataset consists of pairs of variables for which
congruence is calculated. In each pair, one variable is the distribution of a
group of citizens on a given issue, while the other variable is the distribution
of parliamentarians on the same issue. For each issue, there are seven groups
(all citizens; low, middle and high incomes; low, middle and high education).
There are six issues in 2006, five issues in 1998/2001 and two issues in
1989/1990, so thirteen in total. This produces 7*13=91 pairs of variables.

The commands below combine the relevant variables. Important note: since the
Dutch Parliament Studies is not publicly available, this do-file manually
generates the variables from that dataset. We obviously did not perform the
original analyses this way, as we had access to the data. This is only done
here so that all subsequent steps in the analysis can be replicated. */

* Dutch Parliamentary Election Studies 2006
use "$data\NKO2006.dta", clear
recode V145 V195 V185 V165 V135 (990/999=.), gen(v1_1 v8_1 v15_1 v22_1 v29_1)
recode V691 (0=1) (1=2) (4=3) (5=4) (6 7=5) (8 9=6) (10=7) (11/999=.), gen(v36_1)
gen n=_n
keep v1_1-n
tempfile nko
save `nko', replace

* The two loops below save the relevant variables for income and education groups
forval i=1/3 {
	use "$data\NKO2006.dta", clear
	recode V145 V195 V185 V165 V135 (990/999=.), gen(v`=`i'+1'_1			///
	v`=`i'+8'_1	v`=`i'+15'_1 v`=`i'+22'_1 v`=`i'+29'_1)
	recode V691 (0=1) (1=2) (4=3) (5=4) (6 7=5) (8 9=6) (10=7) (11/999=.), gen(v`=`i'+36'_1)
	recode V413 (1/7=1) (8/14=2) (15/20=3) (998=.), gen(inc)
	keep if inc==`i'
	gen n=_n
	keep v`=`i'+1'_1-v`=`i'+36'_1 n
	tempfile inc`i'
	save "`inc`i''", replace
	}

forval i=1/3 {
	use "$data\NKO2006.dta", clear
	recode V145 V195 V185 V165 V135 (990/999=.), gen(v`=`i'+4'_1			///
	v`=`i'+11'_1	v`=`i'+18'_1 v`=`i'+25'_1 v`=`i'+32'_1)
	recode V691 (0=1) (1=2) (4=3) (5=4) (6 7=5) (8 9=6) (10=7) (11/999=.), gen(v`=`i'+39'_1)
	recode V430 (1 2=1) (3 4=2) (5=3) (997=.), gen(edu)
	keep if edu==`i'
	gen n=_n
	keep v`=`i'+4'_1-v`=`i'+39'_1 n
	tempfile edu`i'
	save "`edu`i''", replace
	}

* Dutch Parliamentary Election Studies 1998
use "$data\NKO1971-2006.dta", clear
keep if A1==1998
recode V38_10 V42_11 V41_10 V37_10 (96 97 = .), gen(v43_1 v50_1 v57_1 v64_1)
recode V46_2 (3=2) (4=3) (5 6=4) (7=5) (8 9=6) (10=7) (11/999=.), gen(v71_1)
gen n=_n
keep v43_1-n
tempfile nko98
save `nko98', replace

forval i=1/3 {
	use "$data\NKO1971-2006.dta", clear
	keep if A1==1998
	recode V38_10 V42_11 V41_10 V37_10 (96 97 = .), gen(v`=`i'+43'_1		///
	v`=`i'+50'_1 v`=`i'+57'_1 v`=`i'+64'_1)
	recode V46_2 (3=2) (4=3) (5 6=4) (7=5) (8 9=6) (10=7) (11/999=.), gen(v`=`i'+71'_1)
	recode D49 (1/7=1) (8/10=2) (11 12=3) (96 97=.), gen(inc)
	keep if inc==`i'
	gen n=_n
	keep v`=`i'+43'_1-v`=`i'+71'_1 n
	tempfile inc98`i'
	save "`inc98`i''", replace
	}

forval i=1/3 {
	use "$data\NKO1971-2006.dta", clear
	keep if A1==1998
	recode V38_10 V42_11 V41_10 V37_10 (96 97 = .), gen(v`=`i'+46'_1		///
	v`=`i'+53'_1 v`=`i'+60'_1 v`=`i'+67'_1)
	recode V46_2 (3=2) (4=3) (5 6=4) (7=5) (8 9=6) (10=7) (11/999=.), gen(v`=`i'+74'_1)
	recode D37 (1 2=1) (3 4=2) (5=3) (96 97=.), gen(edu)
	keep if edu==`i'
	gen n=_n
	keep v`=`i'+46'_1-v`=`i'+74'_1 n
	tempfile edu98`i'
	save "`edu98`i''", replace
	}

* Dutch Parliamentary Election Studies 1989
use "$data\NKO1971-2006.dta", clear
keep if A1==1989
recode V38_10 V37_10 (95/97=.), gen(v78_1 v85_1)
gen n=_n
keep v78_1-n
tempfile nko89
save `nko89', replace

forval i=1/3 {
	use "$data\NKO1971-2006.dta", clear
	keep if A1==1989
	recode V38_10 V37_10 (95/97=.), gen(v`=`i'+78'_1 v`=`i'+85'_1)
	recode D49 (2 4=1) (6/8=2) (10 12=3) (96 97=.), gen(inc)
	keep if inc==`i'
	gen n=_n
	keep v`=`i'+78'_1 v`=`i'+85'_1 n
	tempfile inc89`i'
	save "`inc89`i''", replace
	}

forval i=1/3 {
	use "$data\NKO1971-2006.dta", clear
	keep if A1==1989
	recode V38_10 V37_10 (95/97=.), gen(v`=`i'+81'_1 v`=`i'+88'_1)
	recode D37 (1 2=1) (3 4=2) (5=3), gen(edu)
	keep if edu==`i'
	gen n=_n
	keep v`=`i'+81'_1 v`=`i'+88'_1 n
	tempfile edu89`i'
	save "`edu89`i''", replace
	}

use `nko', clear

foreach var of newlist inc1 inc2 inc3 edu1 edu2 edu3 nko98 inc981 inc982	///
	inc983 edu981 edu982 edu983 nko89 inc891 inc892 inc893 edu891 edu892 edu893 {
	quietly merge 1:1 n using ``var'', nogen
	}

* This is where the distributions on the issues are generated for
* parliamentarians (see above note!)
recode n (1=1) (2/7=2) (8/24=3) (25/56=4) (57/77=5) (78/90=6) (91/108=7) (else=.), gen(v1_2)
recode n (1/4=1) (5/22=2) (23/48=3) (49/73=4) (74/85=5) (86/99=6) (100/110=7) (else=.), gen(v8_2)
recode n (1/4=1) (5/12=2) (13/27=3) (28/57=4) (58/84=5) (85/106=6) (107/111=7) (else=.), gen(v15_2)
recode n (1=2) (2/9=3) (10/42=4) (43/79=5) (80/109=6) (110 111=7) (else=.), gen(v22_2)
recode n (1/4=1) (5/12=2) (13/23=3) (24/44=4) (45/68=5) (69/95=6) (96/109=7) (else=.), gen(v29_2)
recode n (1/2=1) (3/15=2) (16/45=3) (46/62=4) (63/97=5) (98/108=6) (else=.), gen(v36_2)
recode n (1/8=2) (9/20=3) (21/48=4) (49/82=5) (83/119=6) (120/134=7) (else=.), gen(v43_2)
recode n (1/10=1) (11/38=2) (39/79=3) (80/110=4) (111/126=5) (127/132=6) (133/135=7) (else=.), gen(v50_2)
recode n (1/9=2) (10/35=3) (36/69=4) (70/110=5) (111/130=6) (131/133=7) (else=.), gen(v57_2)
recode n (1/9=1) (10/14=2) (15/22=3) (23/43=4) (44/69=5) (70/110=6) (111/129=7) (else=.), gen(v64_2)
recode n (1/4=1) (5/30=2) (31/68=3) (69/98=4) (99/122=5) (123/126=6) (127 128=7) (else=.), gen(v71_2)
recode n (1/4=1) (5/21=2) (22/55=3) (56/97=4) (98/135=5) (136/173=6) (174/211=7) (else=.), gen(v78_2)
recode n (1/21=1) (22/50=2) (51/62=3) (63/85=4) (86/116=5) (117/175=6) (176/211=7) (else=.), gen(v85_2)
drop n

* As explained above, there are 91 pairs of variables in the constructed
* dataset. For many of these pairs, the distribution of parliamentarians is the
* same; for instance, low and high incomes are compared to the same group of
* parliamentarians on each issue. The following loop therefore duplicates the
* variables for parliamentarians where necessary.
forval i=1(7)85 {
forval j=1/6 {
	gen v`=`i'+`j''_2=v`i'_2
	}
	}

order v1_1 v1_2 v2_1 v2_2 v3_1 v3_2 v4_1 v4_2 v5_1 v5_2 v6_1 v6_2 v7_1 v7_2	///
	v8_1 v8_2 v9_1 v9_2, first
order v15_1-v91_2, alpha after(v9_2)

label var v1_1 "2006 Income differences, all citizens"
label var v1_2 "2006 Income differences, representatives"
label var v2_1 "2006 Income differences, low income citizens"
label var v2_2 "2006 Income differences, representatives"
label var v3_1 "2006 Income differences, middle income citizens"
label var v3_2 "2006 Income differences, representatives"
label var v4_1 "2006 Income differences, high income citizens"
label var v4_2 "2006 Income differences, representatives"
label var v5_1 "2006 Income differences, low educated citizens"
label var v5_2 "2006 Income differences, representatives"
label var v6_1 "2006 Income differences, middle educated citizens"
label var v6_2 "2006 Income differences, representatives"
label var v7_1 "2006 Income differences, highly educated citizens"
label var v7_2 "2006 Income differences, representatives"
label var v8_1 "2006 European integration, all citizens"
label var v8_2 "2006 European integration, representatives"
label var v9_1 "2006 European integration, low income citizens"
label var v9_2 "2006 European integration, representatives"
label var v10_1 "2006 European integration, middle income citizens"
label var v10_2 "2006 European integration, representatives"
label var v11_1 "2006 European integration, high income citizens"
label var v11_2 "2006 European integration, representatives"
label var v12_1 "2006 European integration, low educated citizens"
label var v12_2 "2006 European integration, representatives"
label var v13_1 "2006 European integration, middle educated citizens"
label var v13_2 "2006 European integration, representatives"
label var v14_1 "2006 European integration, highly educated citizens"
label var v14_2 "2006 European integration, representatives"
label var v15_1 "2006 Multiculturalism, all citizens"
label var v15_2 "2006 Multiculturalism, representatives"
label var v16_1 "2006 Multiculturalism, low income citizens"
label var v16_2 "2006 Multiculturalism, representatives"
label var v17_1 "2006 Multiculturalism, middle income citizens"
label var v17_2 "2006 Multiculturalism, representatives"
label var v18_1 "2006 Multiculturalism, high income citizens"
label var v18_2 "2006 Multiculturalism, representatives"
label var v19_1 "2006 Multiculturalism, low educated citizens"
label var v19_2 "2006 Multiculturalism, representatives"
label var v20_1 "2006 Multiculturalism, middle educated citizens"
label var v20_2 "2006 Multiculturalism, representatives"
label var v21_1 "2006 Multiculturalism, highly educated citizens"
label var v21_2 "2006 Multiculturalism, representatives"
label var v22_1 "2006 Crime, all citizens"
label var v22_2 "2006 Crime, representatives"
label var v23_1 "2006 Crime, low income citizens"
label var v23_2 "2006 Crime, representatives"
label var v24_1 "2006 Crime, middle income citizens"
label var v24_2 "2006 Crime, representatives"
label var v25_1 "2006 Crime, high income citizens"
label var v25_2 "2006 Crime, representatives"
label var v26_1 "2006 Crime, low educated citizens"
label var v26_2 "2006 Crime, representatives"
label var v27_1 "2006 Crime, middle educated citizens"
label var v27_2 "2006 Crime, representatives"
label var v28_1 "2006 Crime, highly educated citizens"
label var v28_2 "2006 Crime, representatives"
label var v29_1 "2006 Euthanasia, all citizens"
label var v29_2 "2006 Euthanasia, representatives"
label var v30_1 "2006 Euthanasia, low income citizens"
label var v30_2 "2006 Euthanasia, representatives"
label var v31_1 "2006 Euthanasia, middle income citizens"
label var v31_2 "2006 Euthanasia, representatives   "
label var v32_1 "2006 Euthanasia, high income citizens"
label var v32_2 "2006 Euthanasia, representatives"
label var v33_1 "2006 Euthanasia, low educated citizens"
label var v33_2 "2006 Euthanasia, representatives"
label var v34_1 "2006 Euthanasia, middle educated citizens"
label var v34_2 "2006 Euthanasia, representatives"
label var v35_1 "2006 Euthanasia, highly educated citizens"
label var v35_2 "2006 Euthanasia, representatives"
label var v36_1 "2006 Left-right, all citizens"
label var v36_2 "2006 Left-right, representatives"
label var v37_1 "2006 Left-right, low income citizens"
label var v37_2 "2006 Left-right, representatives"
label var v38_1 "2006 Left-right, middle income citizens"
label var v38_2 "2006 Left-right, representatives"
label var v39_1 "2006 Left-right, high income citizens"
label var v39_2 "2006 Left-right, representatives"
label var v40_1 "2006 Left-right, low educated citizens"
label var v40_2 "2006 Left-right, representatives"
label var v41_1 "2006 Left-right, middle educated citizens"
label var v41_2 "2006 Left-right, representatives"
label var v42_1 "2006 Left-right, highly educated citizens"
label var v42_2 "2006 Left-right, representatives"
label var v43_1 "1998 Income differences, all citizens"
label var v43_2 "1998 Income differences, representatives"
label var v44_1 "1998 Income differences, low income citizens"
label var v44_2 "1998 Income differences, representatives"
label var v45_1 "1998 Income differences, middle income citizens"
label var v45_2 "1998 Income differences, representatives"
label var v46_1 "1998 Income differences, high income citizens"
label var v46_2 "1998 Income differences, representatives"
label var v47_1 "1998 Income differences, low educated citizens"
label var v47_2 "1998 Income differences, representatives"
label var v48_1 "1998 Income differences, middle educated citizens"
label var v48_2 "1998 Income differences, representatives"
label var v49_1 "1998 Income differences, highly educated citizens"
label var v49_2 "1998 Income differences, representatives"
label var v50_1 "1998 European integration, all citizens"
label var v50_2 "1998 European integration, representatives"
label var v51_1 "1998 European integration, low income citizens"
label var v51_2 "1998 European integration, representatives"
label var v52_1 "1998 European integration, middle income citizens"
label var v52_2 "1998 European integration, representatives"
label var v53_1 "1998 European integration, high income citizens"
label var v53_2 "1998 European integration, representatives"
label var v54_1 "1998 European integration, low educated citizens"
label var v54_2 "1998 European integration, representatives"
label var v55_1 "1998 European integration, middle educated citizens"
label var v55_2 "1998 European integration, representatives"
label var v56_1 "1998 European integration, highly educated citizens"
label var v56_2 "1998 European integration, representatives"
label var v57_1 "1998 Multiculturalism, all citizens"
label var v57_2 "1998 Multiculturalism, representatives"
label var v58_1 "1998 Multiculturalism, low income citizens"
label var v58_2 "1998 Multiculturalism, representatives"
label var v59_1 "1998 Multiculturalism, middle income citizens"
label var v59_2 "1998 Multiculturalism, representatives"
label var v60_1 "1998 Multiculturalism, high income citizens"
label var v60_2 "1998 Multiculturalism, representatives"
label var v61_1 "1998 Multiculturalism, low educated citizens"
label var v61_2 "1998 Multiculturalism, representatives"
label var v62_1 "1998 Multiculturalism, middle educated citizens"
label var v62_2 "1998 Multiculturalism, representatives"
label var v63_1 "1998 Multiculturalism, highly educated citizens"
label var v63_2 "1998 Multiculturalism, representatives"
label var v64_1 "1998 Euthanasia, all citizens"
label var v64_2 "1998 Euthanasia, representatives"
label var v65_1 "1998 Euthanasia, low income citizens"
label var v65_2 "1998 Euthanasia, representatives"
label var v66_1 "1998 Euthanasia, middle income citizens"
label var v66_2 "1998 Euthanasia, representatives"
label var v67_1 "1998 Euthanasia, high income citizens"
label var v67_2 "1998 Euthanasia, representatives"
label var v68_1 "1998 Euthanasia, low educated citizens"
label var v68_2 "1998 Euthanasia, representatives"
label var v69_1 "1998 Euthanasia, middle educated citizens"
label var v69_2 "1998 Euthanasia, representatives"
label var v70_1 "1998 Euthanasia, highly educated citizens"
label var v70_2 "1998 Euthanasia, representatives"
label var v71_1 "1998 Left-right, all citizens"
label var v71_2 "1998 Left-right, representatives"
label var v72_1 "1998 Left-right, low income citizens"
label var v72_2 "1998 Left-right, representatives"
label var v73_1 "1998 Left-right, middle income citizens"
label var v73_2 "1998 Left-right, representatives"
label var v74_1 "1998 Left-right, high income citizens"
label var v74_2 "1998 Left-right, representatives"
label var v75_1 "1998 Left-right, low educated citizens"
label var v75_2 "1998 Left-right, representatives"
label var v76_1 "1998 Left-right, middle educated citizens"
label var v76_2 "1998 Left-right, representatives"
label var v77_1 "1998 Left-right, highly educated citizens"
label var v77_2 "1998 Left-right, representatives"
label var v78_1 "1989 Income differences, all citizens"
label var v78_2 "1989 Income differences, representatives"
label var v79_1 "1989 Income differences, low income citizens"
label var v79_2 "1989 Income differences, representatives"
label var v80_1 "1989 Income differences, middle income citizens"
label var v80_2 "1989 Income differences, representatives"
label var v81_1 "1989 Income differences, high income citizens"
label var v81_2 "1989 Income differences, representatives"
label var v82_1 "1989 Income differences, low educated citizens"
label var v82_2 "1989 Income differences, representatives"
label var v83_1 "1989 Income differences, middle educated citizens"
label var v83_2 "1989 Income differences, representatives"
label var v84_1 "1989 Income differences, highly educated citizens"
label var v84_2 "1989 Income differences, representatives"
label var v85_1 "1989 Euthanasia, all citizens"
label var v85_2 "1989 Euthanasia, representatives"
label var v86_1 "1989 Euthanasia, low income citizens"
label var v86_2 "1989 Euthanasia, representatives"
label var v87_1 "1989 Euthanasia, middle income citizens"
label var v87_2 "1989 Euthanasia, representatives"
label var v88_1 "1989 Euthanasia, high income citizens"
label var v88_2 "1989 Euthanasia, representatives"
label var v89_1 "1989 Euthanasia, low educated citizens"
label var v89_2 "1989 Euthanasia, representatives"
label var v90_1 "1989 Euthanasia, middle educated citizens"
label var v90_2 "1989 Euthanasia, representatives"
label var v91_1 "1989 Euthanasia, highly educated citizens"
label var v91_2 "1989 Euthanasia, representatives"

save "$data\Frequencies.dta", replace

/*------------------------------------------------------------------------------
 2. Calculating congruence
------------------------------------------------------------------------------*/

/* Each variable in the constructed dataset serves as a surrogate sampling frame
from which we draw bootstrap samples. But first, we use the data to calculate
point estimates for many-to-many congruence between (groups of) citizens and
representatives.

We want to calculate congruence for by comparing parliamentarians to different
groups of citizens on various issues. This results in 91 pairs of variables for
which we calculate congruence (e.g. v1_1 and v1_2). The loop below creates
a frequency distribution for each variable, divides it by the total number of
observations to get relative frequencies, and saves this as a new variable.
Then, within each pair we select the lowest of the two values at each point of
the scale, which gives us seven values that are added up to arrive at the
congruence score.

Along the way, we also calculate congruence using the cumulative distributions
(in loop "k") to check whether this produces the same results. Finally, the data
is reshaped to make the results easier to read. */

forval i=1/91 {
	forval j=1/2 {
	use "$data\Frequencies.dta", clear
	quietly tab v`i'_`j', matcell(freqs) matrow(scale)
	matrix f`i'_`j'=freqs*100/r(N)
	svmat f`i'_`j'
	svmat scale
	rename f`i'_`j'1 f`i'_`j'
	rename scale1 scale
	
	keep f`i'_`j' scale
	drop if scale==.
	tempfile small`i'_`j'
	quietly save "`small`i'_`j''", replace
	}
	}

clear
set obs 7
gen scale=_n

forval i=1/91 {
	forval j=1/2 {
	quietly merge 1:1 scale using "`small`i'_`j''", nogen
	recode f`i'_`j' (.=0)
	quietly gen cf`i'_`j'=.

	forval k=1/7 {
	quietly sum f`i'_`j' in 1/`k'
	replace cf`i'_`j'=r(mean)*`k' in `k'
	}
	}

	quietly egen min`i' = rowmin(f`i'_1 f`i'_2)
	gen diff`i'=abs(cf`i'_1-cf`i'_2)
	quietly egen congruence`i' = total(min`i')
	quietly egen cumulative`i'=total(diff`i')
	}

keep in 1
keep congruence* cumulative*
gen temp=1
reshape long congruence cumulative, i(temp) j(id)
drop temp

* Identifying variables
recode id (1/42=2006) (43/77=1998) (78/91=1989), gen(year)
recode id (1/7 43/49 78/84=1) (8/14 50/56=2) (15/21 57/63=3) (22/28=4)		///
	(29/35 64/70 85/91=5) (36/42 71/77=6), gen(issue)
egen group=seq(), from(1) to(7) block(1)
order id year issue group congruence cumulative

label define issue 1 "Income differences" 2 "European integration"			///
	 3 "Multiculturalism" 4 "Crime" 5 "Euthanasia" 6 "Left-right", replace

label define group 1 "All citizens" 2 "Low incomes" 3 "Middle incomes"		///
	4 "High incomes" 5 "Low educated" 6 "Middle educated" 7 "Highly educated", replace

label values issue issue
label values group group

/*------------------------------------------------------------------------------
 3. Bootstrap
------------------------------------------------------------------------------*/

/* The loops below draws a thousand random samples with replacement for each
variable. For each sample of each variable, the relative frequencies are saved
as a new variable. This is then saved as a temporary file, after which all
relative frequencies are merged. (The variable 'scale' helps with the merge.)
Next, congruence is calculated in the same way as before, except this time it's
calculated for each pair of bootstrap samples. Finally, these estimates are
saved for each pair of variables, again as temporary files, and merged. Note
that both loops have to be executed at once, otherwise Stata will delete the
temporary files before the merge takes place. Also, note that the command uses
the dataset that was made above ("Frequencies"). */

set seed 55478

forval i=1/91 {
	forval j=1/2 {
	forval k=1/1000 {
	use "$data\Frequencies.dta", clear
	keep v`i'_`j'
	quietly drop if v`i'_`j'==.
	bsample _N

	quietly tab v`i'_`j', matcell(freqs) matrow(scale)
	matrix bs`i'_`j'`k'=freqs*100/r(N)
	svmat bs`i'_`j'`k'
	svmat scale
	rename bs`i'_`j'`k'1 bs`i'_`j'`k'
	rename scale1 scale

	keep bs`i'_`j'`k' scale
	keep in 1/7
	drop if scale==.
	tempfile small`i'_`j'`k'
	quietly save "`small`i'_`j'`k''", replace
	}
	}

	clear
	set obs 7
	gen scale=_n

	forval j=1/2 {
	forval k=1/1000 {
	quietly merge 1:1 scale using "`small`i'_`j'`k''", nogen
	recode bs`i'_`j'`k' (.=0)
	}
	}

	set obs 1000
	replace scale=_n

	quietly gen congruence`i'=.

	forval k=1/1000 {
	quietly egen min`i'_`k'=rowmin(bs`i'_1`k' bs`i'_2`k')
	quietly egen cong`i'_`k'=total(min`i'_`k')
	quietly replace congruence`i'=cong`i'_`k' in `k'
	}

	keep congruence* scale

	tempfile small`i'
	quietly save "`small`i''", replace
	}

clear
set obs 1000
gen scale=_n

forval i=1/91 {
	quietly merge 1:1 scale using "`small`i''", nogen
	}

keep congruence*

label var congruence1 "2006 Income differences, all citizens"
label var congruence2 "2006 Income differences, low income citizens"
label var congruence3 "2006 Income differences, middle income citizens"
label var congruence4 "2006 Income differences, high income citizens"
label var congruence5 "2006 Income differences, low educated citizens"
label var congruence6 "2006 Income differences, middle educated citizens"
label var congruence7 "2006 Income differences, highly educated citizens"
label var congruence8 "2006 European integration, all citizens"
label var congruence9 "2006 European integration, low income citizens"
label var congruence10 "2006 European integration, middle income citizens"
label var congruence11 "2006 European integration, high income citizens"
label var congruence12 "2006 European integration, low educated citizens"
label var congruence13 "2006 European integration, middle educated citizens"
label var congruence14 "2006 European integration, highly educated citizens"
label var congruence15 "2006 Multiculturalism, all citizens"
label var congruence16 "2006 Multiculturalism, low income citizens"
label var congruence17 "2006 Multiculturalism, middle income citizens"
label var congruence18 "2006 Multiculturalism, high income citizens"
label var congruence19 "2006 Multiculturalism, low educated citizens"
label var congruence20 "2006 Multiculturalism, middle educated citizens"
label var congruence21 "2006 Multiculturalism, highly educated citizens"
label var congruence22 "2006 Crime, all citizens"
label var congruence23 "2006 Crime, low income citizens"
label var congruence24 "2006 Crime, middle income citizens"
label var congruence25 "2006 Crime, high income citizens"
label var congruence26 "2006 Crime, low educated citizens"
label var congruence27 "2006 Crime, middle educated citizens"
label var congruence28 "2006 Crime, highly educated citizens"
label var congruence29 "2006 Euthanasia, all citizens"
label var congruence30 "2006 Euthanasia, low income citizens"
label var congruence31 "2006 Euthanasia, middle income citizens"
label var congruence32 "2006 Euthanasia, high income citizens"
label var congruence33 "2006 Euthanasia, low educated citizens"
label var congruence34 "2006 Euthanasia, middle educated citizens"
label var congruence35 "2006 Euthanasia, highly educated citizens"
label var congruence36 "2006 Left-right, all citizens"
label var congruence37 "2006 Left-right, low income citizens"
label var congruence38 "2006 Left-right, middle income citizens"
label var congruence39 "2006 Left-right, high income citizens"
label var congruence40 "2006 Left-right, low educated citizens"
label var congruence41 "2006 Left-right, middle educated citizens"
label var congruence42 "2006 Left-right, highly educated citizens"
label var congruence43 "1998 Income differences, all citizens"
label var congruence44 "1998 Income differences, low income citizens"
label var congruence45 "1998 Income differences, middle income citizens"
label var congruence46 "1998 Income differences, high income citizens"
label var congruence47 "1998 Income differences, low educated citizens"
label var congruence48 "1998 Income differences, middle educated citizens"
label var congruence49 "1998 Income differences, highly educated citizens"
label var congruence50 "1998 European integration, all citizens"
label var congruence51 "1998 European integration, low income citizens"
label var congruence52 "1998 European integration, middle income citizens"
label var congruence53 "1998 European integration, high income citizens"
label var congruence54 "1998 European integration, low educated citizens"
label var congruence55 "1998 European integration, middle educated citizens"
label var congruence56 "1998 European integration, highly educated citizens"
label var congruence57 "1998 Multiculturalism, all citizens"
label var congruence58 "1998 Multiculturalism, low income citizens"
label var congruence59 "1998 Multiculturalism, middle income citizens"
label var congruence60 "1998 Multiculturalism, high income citizens"
label var congruence61 "1998 Multiculturalism, low educated citizens"
label var congruence62 "1998 Multiculturalism, middle educated citizens"
label var congruence63 "1998 Multiculturalism, highly educated citizens"
label var congruence64 "1998 Euthanasia, all citizens"
label var congruence65 "1998 Euthanasia, low income citizens"
label var congruence66 "1998 Euthanasia, middle income citizens"
label var congruence67 "1998 Euthanasia, high income citizens"
label var congruence68 "1998 Euthanasia, low educated citizens"
label var congruence69 "1998 Euthanasia, middle educated citizens"
label var congruence70 "1998 Euthanasia, highly educated citizens"
label var congruence71 "1998 Left-right, all citizens"
label var congruence72 "1998 Left-right, low income citizens"
label var congruence73 "1998 Left-right, middle income citizens"
label var congruence74 "1998 Left-right, high income citizens"
label var congruence75 "1998 Left-right, low educated citizens"
label var congruence76 "1998 Left-right, middle educated citizens"
label var congruence77 "1998 Left-right, highly educated citizens"
label var congruence78 "1989 Income differences, all citizens"
label var congruence79 "1989 Income differences, low income citizens"
label var congruence80 "1989 Income differences, middle income citizens"
label var congruence81 "1989 Income differences, high income citizens"
label var congruence82 "1989 Income differences, low educated citizens"
label var congruence83 "1989 Income differences, middle educated citizens"
label var congruence84 "1989 Income differences, highly educated citizens"
label var congruence85 "1989 Euthanasia, all citizens"
label var congruence86 "1989 Euthanasia, low income citizens"
label var congruence87 "1989 Euthanasia, middle income citizens"
label var congruence88 "1989 Euthanasia, high income citizens"
label var congruence89 "1989 Euthanasia, low educated citizens"
label var congruence90 "1989 Euthanasia, middle educated citizens"
label var congruence91 "1989 Euthanasia, highly educated citizens"

* The output is saved because it will be used for some of the graphs.
save "$data\Bootstrap output.dta", replace

/*------------------------------------------------------------------------------
 4. Control variables
------------------------------------------------------------------------------*/

/* Incorporating control variables is slightly more cumbersome than usual, as
reflected in the many loops below. The idea is the same as above, namely to save
the distributions of groups of citizens on the issue questions. The difference
is that there are no longer 7 groups of citizens, but 146, representing
different combinations of control variables (e.g. citizens with low incomes but
a high level of education and a medium level of political knowledge). */

use "$data\NKO2006.dta", clear

recode V430 (1 2=1) (3 4=2) (5=3) (997=.), gen(edu)
recode V413 (1/7=1) (8/14=2) (15/20=3) (998=.), gen(inc)
gen part=1
recode part (1=2) if V224==1 & V226==1 & V510==1
recode part (2=3) if V771==1 | V772==1 | V773==1 | V774==1 | V775==1 |		///
	V776==1 | V777==1 | V778==1

recode inc part (.=.), gen(cntrl1 cntrl2)
recode V254 (0 1=1) (2=2) (3 4=3) (995 996=.), gen(cntrl3)
gen cntrl4=2006-V421
recode cntrl4 (18/39=1) (40/59=2) (60/99=3)
gen cntrl5=V420
recode V432 (3=2), gen(cntrl6)

tempfile control
save `control'

* First, we combine control variables with income. The first two loops save
* the distributions where one control variable is used at a time.
forval i=1(2)3 {
forval j=1(1)3 {
forval k=1(1)4 {
	use `control', clear
	drop cntrl1
	gen cntrl1=edu
	keep if inc==`i' & cntrl`k'==`j'
	gen n=_n
	recode V145 (990/999=.), gen(a0`k'_`i'`j')
	keep a0`k'_`i'`j' n
	tempfile i`k'_`i'`j'
	save "`i`k'_`i'`j''"
	}
	}

forval j=1(1)2 {
forval k=5(1)6 {
	use `control', clear
	drop cntrl1
	gen cntrl1=edu
	keep if inc==`i' & cntrl`k'==`j'
	gen n=_n
	recode V145 (990/999=.), gen(a0`k'_`i'`j')
	keep a0`k'_`i'`j' n
	tempfile i`k'_`i'`j'
	save "`i`k'_`i'`j''"
	}
	}

* Next, the same control variables are used, but education is now a second
* control.
forval j=1(1)3 {
forval k=2(1)4 {
forval l=1(1)3 {
	use `control', clear
	keep if inc==`i' & cntrl`k'==`j' & edu==`l'
	gen n=_n
	recode V145 (990/999=.), gen(a0`=`k'+5'_`i'`j'`l')
	keep a0`=`k'+5'_`i'`j'`l' n
	tempfile i`=`k'+5'_`i'`j'`l'
	save "`i`=`k'+5'_`i'`j'`l''"
	}
	}
	}

forval j=1(1)2 {
forval k=5(1)6 {
forval l=1(1)3 {
	use `control', clear
	keep if inc==`i' & cntrl`k'==`j' & edu==`l'
	gen n=_n
	recode V145 (990/999=.), gen(a`=`k'+5'_`i'`j'`l')
	keep a`=`k'+5'_`i'`j'`l' n
	tempfile i`=`k'+5'_`i'`j'`l'
	save "`i`=`k'+5'_`i'`j'`l''"
	}
	}
	}

* Finally, we control for political knowledge and political participation, and
* age and political participation, respectively.
forval j=1(1)3 {
forval k=3(1)4 {
forval l=1(1)3 {
	use `control', clear
	keep if inc==`i' & cntrl`k'==`j' & part==`l'
	gen n=_n
	recode V145 (990/999=.), gen(a`=`k'+9'_`i'`j'`l')
	keep a`=`k'+9'_`i'`j'`l' n
	tempfile i`=`k'+9'_`i'`j'`l'
	save "`i`=`k'+9'_`i'`j'`l''"
	}
	}
	}
	}

* In the loops below, the same is done for education as was done for income
* above. The difference is that we focus on all five issues for education
* instead of just the one about income differences for income.
forval i=1(2)3 {
forval j=1(1)3 {
forval k=1(1)4 {
	use `control', clear
	keep if edu==`i' & cntrl`k'==`j'
	gen n=_n
	recode V145 V195 V185 V165 V135 (990/999=.), gen(b0`k'_`i'`j'			///
	c0`k'_`i'`j' d0`k'_`i'`j' e0`k'_`i'`j' f0`k'_`i'`j')
	keep *`k'_`i'`j' n
	tempfile v`k'_`i'`j'
	save "`v`k'_`i'`j''"
	}
	}

forval j=1(1)2 {
forval k=5(1)6 {
	use `control', clear
	keep if edu==`i' & cntrl`k'==`j'
	gen n=_n
	recode V145 V195 V185 V165 V135 (990/999=.), gen(b0`k'_`i'`j'			///
	c0`k'_`i'`j' d0`k'_`i'`j' e0`k'_`i'`j' f0`k'_`i'`j')
	keep *`k'_`i'`j' n
	tempfile v`k'_`i'`j'
	save "`v`k'_`i'`j''"
	}
	}

forval j=1(1)3 {
forval k=2(1)4 {
forval l=1(1)3 {
	use `control', clear
	keep if edu==`i' & cntrl`k'==`j' & inc==`l'
	gen n=_n
	recode V145 V195 V185 V165 V135 (990/999=.), gen(b0`=`k'+5'_`i'`j'`l'	///
	c0`=`k'+5'_`i'`j'`l' d0`=`k'+5'_`i'`j'`l' e0`=`k'+5'_`i'`j'`l' f0`=`k'+5'_`i'`j'`l')
	keep *`=`k'+5'_`i'`j'`l' n
	tempfile v`=`k'+5'_`i'`j'`l'
	save "`v`=`k'+5'_`i'`j'`l''"
	}
	}
	}

forval j=1(1)2 {
forval k=5(1)6 {
forval l=1(1)3 {
	use `control', clear
	keep if edu==`i' & cntrl`k'==`j' & inc==`l'
	gen n=_n
	recode V145 V195 V185 V165 V135 (990/999=.), gen(b`=`k'+5'_`i'`j'`l'	///
	c`=`k'+5'_`i'`j'`l' d`=`k'+5'_`i'`j'`l' e`=`k'+5'_`i'`j'`l' f`=`k'+5'_`i'`j'`l')
	keep *`=`k'+5'_`i'`j'`l' n
	tempfile v`=`k'+5'_`i'`j'`l'
	save "`v`=`k'+5'_`i'`j'`l''"
	}
	}
	}

forval j=1(1)3 {
forval k=3(1)4 {
forval l=1(1)3 {
	use `control', clear
	keep if edu==`i' & cntrl`k'==`j' & part==`l'
	gen n=_n
	recode V145 V195 V185 V165 V135 (990/999=.), gen(b`=`k'+9'_`i'`j'`l'	///
	c`=`k'+9'_`i'`j'`l' d`=`k'+9'_`i'`j'`l' e`=`k'+9'_`i'`j'`l' f`=`k'+9'_`i'`j'`l')
	keep *`=`k'+9'_`i'`j'`l' n
	tempfile v`=`k'+9'_`i'`j'`l'
	save "`v`=`k'+9'_`i'`j'`l''"
	}
	}
	}
	}

clear
set obs 500
gen n=_n

* Next, all variables are merged.
forval i=1(2)3 {
forval j=1(1)3 {
forval k=1(1)4 {
	quietly merge 1:1 n using `i`k'_`i'`j'', nogen
	quietly merge 1:1 n using `v`k'_`i'`j'', nogen
	}
	}

forval j=1(1)2 {
forval k=5(1)6 {
	quietly merge 1:1 n using `i`k'_`i'`j'', nogen
	quietly merge 1:1 n using `v`k'_`i'`j'', nogen
	}
	}

forval j=1(1)3 {
forval k=2(1)4 {
forval l=1(1)3 {
	quietly merge 1:1 n using `i`=`k'+5'_`i'`j'`l'', nogen
	quietly merge 1:1 n using `v`=`k'+5'_`i'`j'`l'', nogen
	}
	}
	}

forval j=1(1)2 {
forval k=5(1)6 {
forval l=1(1)3 {
	quietly merge 1:1 n using `i`=`k'+5'_`i'`j'`l'', nogen
	quietly merge 1:1 n using `v`=`k'+5'_`i'`j'`l'', nogen
	}
	}
	}

forval j=1(1)3 {
forval k=3(1)4 {
forval l=1(1)3 {
	quietly merge 1:1 n using `i`=`k'+9'_`i'`j'`l'', nogen
	quietly merge 1:1 n using `v`=`k'+9'_`i'`j'`l'', nogen
	}
	}
	}
	}

* The variables are renamed in the same format as above.
order a01_11-f13_333, alpha after(n)
rename a01_11 v1_1

foreach v of varlist a01_12-f13_333 {
	unab vars: v1_1-`v'
	local i: word count `vars'
	rename `v' v`i'_1
	}

* Again, the distributions for parliamentarians are created manually and duplicated.
recode n (1=1) (2/7=2) (8/24=3) (25/56=4) (57/77=5) (78/90=6) (91/108=7) (else=.), gen(v1_2)
recode n (1=1) (2/7=2) (8/24=3) (25/56=4) (57/77=5) (78/90=6) (91/108=7) (else=.), gen(v147_2)
recode n (1/4=1) (5/22=2) (23/48=3) (49/73=4) (74/85=5) (86/99=6) (100/110=7) (else=.), gen(v293_2)
recode n (1/4=1) (5/12=2) (13/27=3) (28/57=4) (58/84=5) (85/106=6) (107/111=7) (else=.), gen(v439_2)
recode n (1=2) (2/9=3) (10/42=4) (43/79=5) (80/109=6) (110 111=7) (else=.), gen(v585_2)
recode n (1/4=1) (5/12=2) (13/23=3) (24/44=4) (45/68=5) (69/95=6) (96/109=7) (else=.), gen(v731_2)

forval i=1(146)731 {
forval j=1/145 {
	gen v`=`i'+`j''_2=v`i'_2
	}
	}

forval i=1/876 {
	order v`i'_2, after(v`i'_1)
	}

tempfile control
save `control', replace

* Next, congruence is calculated for all 876 pairs of variables in the same way
* as in part 2, the only difference being that we don't calculate cumulative
* congruence anymore.
forval i=1/876 {
	forval j=1/2 {
	use `control', clear
	quietly tab v`i'_`j', matcell(freqs) matrow(scale)
	matrix f`i'_`j'=freqs*100/r(N)
	svmat f`i'_`j'
	svmat scale
	rename f`i'_`j'1 f`i'_`j'
	rename scale1 scale

	keep f`i'_`j' scale
	drop if scale==.
	tempfile small`i'_`j'
	quietly save "`small`i'_`j''", replace
	}
	}

clear
set obs 7
gen scale=_n

forval i=1/876 {
	forval j=1/2 {
	quietly merge 1:1 scale using "`small`i'_`j''", nogen
	recode f`i'_`j' (.=0)
	}

	quietly egen min`i' = rowmin(f`i'_1 f`i'_2)
	quietly egen congruence`i' = total(min`i')
	drop min`i' f`i'_1 f`i'_2
	}

keep in 1
reshape long congruence, i(scale) j(id2)
drop scale

* Identifying variables
egen issue=seq(), from(1) to(6) block(146)
replace issue=issue-1 if id2>146
egen temp=seq(), from(1) to(146) block(1)

recode temp (4/6 10/12 16/18 22/24 27 28 31 32 42/50 60/68 78/86 93/98		///
	105/110 120/128 138/146=2) (else=1), gen(group)
replace group=group+2 if id2<147
recode temp (1 4=1) (2 5=2) (3 6=3) (7 10 33/35 42/44=4) (8 11 36/38		///
	45/47=5) (9 12 39/41 48/50=6) (13 16 51/53 60/62 111/113 120/122=7)		///
	(14 17 54/56 63/65 114/116 123/125=8) (15 18 57/59 66/68 117/119		///
	126/128=9) (19 22 69/71 78/80 129/131 138/140=10) (20 23 72/74 81/83	///
	132/134 141/143=11) (25 27 87/89 93/95=13) (26 28 90/92 96/98=14)		///
	(29 31 99/101 105/107=15) (30 32 102/104 108/110=16) (else=12), gen(control1)

egen control2=seq() if temp>2, from(1) to(3) block(1)
replace control2=control2+3 if temp>110
replace control2=0 if temp<33
drop temp

/* Note that the first three values of the variables 'control1' and 'control2'
are labelled as "Income 1", "Income 2" and "Income 3", respectively. This is
accurate for the five issues where the main effect is that of education. For the
one issue where the main effect is income, we obviously don't control for
income; we control for education there. However, since the data is reshaped
below, this is still the most convenient way to structure the data. */
label define issue 1 "Income differences" 2 "European integration"			///
	 3 "Multiculturalism" 4 "Crime" 5 "Euthanasia", replace
label define control 1 "Income 1" 2 "Income 2" 3 "Income 3"					///
	4 "Participation 1" 5 "Participation 2"	6 "Participation 3"				///
	7 "Knowledge 1" 8 "Knowledge 2" 9 "Knowledge 3" 10 "Age 1" 11 "Age 2"	///
	12 "Age 3" 13 "Gender 1" 14 "Gender 2" 15 "Ethnicity 1" 16 "Ethnicity 2", replace

label values issue issue
label values control1 control
label values control2 control

egen id=group(issue control2 control1)

drop id2
reshape wide congruence, i(id) j(group)
recode control2 (0=.)
order id issue control1 control2 congruence1 congruence2 congruence3 congruence4

gen edu_gap=congruence2-congruence1
gen inc_gap=congruence4-congruence3

* For each control variable, or combination of control variables, we can now
* calculate the main effect (the difference between high and low education/
* income). For the sake of convenience, this is combined in a matrix.
forval i=1/5 {
forval j=1(3)10 {
	qui sum edu_gap if issue==`i' & control1>=`j' & control1<=`=`j'+2' & control2==.
	matrix e0_`i'`j'=r(mean)
	}

forval j=4(3)10 {
	qui sum edu_gap if issue==`i' & control1>=`j' & control1<=`=`j'+2' & control2!=. & control2<4
	matrix ei_`i'`j'=r(mean)
	}

forval j=7(3)10 {
	qui sum edu_gap if issue==`i' & control1>=`j' & control1<=`=`j'+2' & control2!=. & control2>3
	matrix ep_`i'`j'=r(mean)
	}

forval j=13(2)15 {
	qui sum edu_gap if issue==`i' & control1>=`j' & control1<=`=`j'+1' & control2==.
	matrix e0_`i'`j'=r(mean)
	qui sum edu_gap if issue==`i' & control1>=`j' & control1<=`=`j'+1' & control2!=. & control2<4
	matrix ei_`i'`j'=r(mean)
	}
	}

forval j=1(3)10 {
	qui sum inc_gap if issue==1 & control1>=`j' & control1<=`=`j'+2' & control2==.
	matrix i0_1`j'=r(mean)
	}

forval j=4(3)10 {
	qui sum inc_gap if issue==1 & control1>=`j' & control1<=`=`j'+2' & control2!=. & control2<4
	matrix ie_1`j'=r(mean)
	}

forval j=7(3)10 {
	qui sum inc_gap if issue==1 & control1>=`j' & control1<=`=`j'+2' & control2!=. & control2>3
	matrix ip_1`j'=r(mean)
	}

forval j=13(2)15 {
	qui sum inc_gap if issue==1 & control1>=`j' & control1<=`=`j'+1' & control2==.
	matrix i0_1`j'=r(mean)
	qui sum inc_gap if issue==1 & control1>=`j' & control1<=`=`j'+1' & control2!=. & control2<4
	matrix ie_1`j'=r(mean)
	}

matrix gap=19.78,20.43,27.33,41.12,35.55,16.85\i0_11,e0_11,e0_21,e0_31,		///
	e0_41,e0_51\i0_14,e0_14,e0_24,e0_34,e0_44,e0_54\i0_17,e0_17,e0_27,		///
	e0_37,e0_47,e0_57\i0_110,e0_110,e0_210,e0_310,e0_410,e0_510\i0_113,		///
	e0_113,e0_213,e0_313,e0_413,e0_513\i0_115,e0_115,e0_215,e0_315,e0_415,	///
	e0_515\ie_14,ei_14,ei_24,ei_34,ei_44,ei_54\ie_17,ei_17,ei_27,ei_37,		///
	ei_47,ei_57\ie_110,ei_110,ei_210,ei_310,ei_410,ei_510\ie_113,ei_113,	///
	ei_213,ei_313,ei_413,ei_513\ie_115,ei_115,ei_215,ei_315,ei_415,ei_515\	///
	ip_17,ep_17,ep_27,ep_37,ep_47,ep_57\ip_110,ep_110,ep_210,ep_310,ep_410,ep_510

matrix rownames gap=None Income Participation Knowledge Age Gender Ethnicity ///
Income+Part. Income+Know. Income+Age Income+Gender Income+Ethn. Part.+Know. Part.+Age
matrix colnames gap=Inc.Diff.(Inc.) Inc.Diff.(Edu.) Eur.Int. Multicult. Crime Euthanasia

/* This matrix displays the size of the main effect when controlling for the
variables in the row names. The size of the effect without control variables,
listed in the first row, is taken from the values obtained after calculating
congruence in part 2 above.

The reader should replace 'Income' with 'Education' in the row names when
looking at the first column. That is, we don't control for, for instance,
Income+Gender in the first column but Education+Gender. */
matrix list gap
